library(magrittr)
library(stringr)
library(plyr)
library(tidyverse)
library(readxl)
library(stargazer)


rm(list=ls())
home = 'C:/Users/Jason/Dropbox/VNA_Responsiveness/Analysis/JOP-dataverse/'


delegates = paste0(home, 'individual-outcomes.xlsx') %>%
  read_xlsx %>%
  mutate(ID=Original) %>%
  subset(select=c(ID,EduCareer,EduYears,EduLevel))
survey = paste0(home, 'survey-outcomes.xlsx') %>%
  read_xlsx %>%
  transform(Treatment=factor(Treatment, levels=c('Control','Citizen','Firm')),
            Missing=as.integer(is.na(Q1)),
            Dosage=Prop.Citizen+Prop.Firm) %>%
  merge(delegates, by='ID', all.x=T)
survey$Dosage[survey$Dosage>0 & survey$Dosage<1] = 0.5
pooled_texts = paste0(home, 'pooled-outcomes.xlsx') %>%
  read_xlsx %>%
  transform(Treatment=factor(Treatment, levels=c('Control','Citizen','Firm')),
            Dosage=Prop.Citizen+Prop.Firm)
pooled_texts$Dosage[pooled_texts$Dosage>0 & pooled_texts$Dosage<1] = 0.5

# Table 1: Balance and descriptive statistics
table_1_top = subset(survey, !is.na(Treatment)) %>%
  dlply('Treatment', function(x) {
    data.frame(Mean=round(c(mean(x$FullTime), mean(x$CentNom), mean(x$Competitive), mean(x$EduCareer), mean(x$EduYears), mean(x$EduLevel)), 3),
               SD=round(c(sd(x$FullTime), sd(x$CentNom), sd(x$Competitive), sd(x$EduCareer), sd(x$EduYears), sd(x$EduLevel)), 3),
               Missing=c('','',''),
               stringsAsFactors=F)
  }) %>%
  do.call(cbind, .) %>%
  set_rownames(c('Full-time','Central Nominee','Competitively elected','Educational career','Years of education','Level of education')) %>%
  set_colnames(c('Mean-con','SD-con','NA-con','Mean-cit','SD-cit','NA-cit','Mean-firm','SD-firm','NA-firm'))
table_1_top = subset(survey, !is.na(Treatment)) %>%
  dlply('Treatment', function(x) {
    data.frame(Mean=mean(x$Q1, na.rm=T),
               SD=sd(x$Q1, na.rm=T),
               Missing=sum(is.na(x$Q1)),
               stringsAsFactors=F)
  }) %>%
  do.call(cbind, .) %>%
  set_rownames('Prepared for debate') %>%
  set_colnames(c('Mean-con','SD-con','NA-con','Mean-cit','SD-cit','NA-cit','Mean-firm','SD-firm','NA-firm')) %>%
  rbind(table_1_top, .)
table_1_top = subset(pooled_texts, !is.na(Treatment)) %>%
  dlply('Treatment', function(x) {
    data.frame(Mean=c(mean(x$Spoke), mean(x$said_own_province)),
               SD=c(sd(x$Spoke), sd(x$said_own_province)),
               Missing=c('',''),
               stringsAsFactors=F)
  }) %>%
  do.call(cbind, .) %>%
  set_rownames(c('Spoke','Mentioned province')) %>%
  set_colnames(c('Mean-con','SD-con','NA-con','Mean-cit','SD-cit','NA-cit','Mean-firm','SD-firm','NA-firm')) %>%
  rbind(table_1_top, .)

table_1_bottom = ldply(c('FullTime','CentNom','Competitive','EduCareer','EduYears','EduLevel'), function(variable) {
  out1 = t.test(x=subset(survey, !is.na(Treatment) & Treatment=='Control')[,variable,drop=T],
                y=subset(survey, !is.na(Treatment) & Treatment=='Citizen')[,variable,drop=T],
                alternative='two.sided', mu=0, var.equal=F, conf.level=0.95)
  out2 = t.test(x=subset(survey, !is.na(Treatment) & Treatment=='Control')[,variable,drop=T],
                y=subset(survey, !is.na(Treatment) & Treatment=='Firm')[,variable,drop=T],
                alternative='two.sided', mu=0, var.equal=F, conf.level=0.95)
  out3 = t.test(x=subset(survey, !is.na(Treatment) & Treatment=='Citizen')[,variable,drop=T],
                y=subset(survey, !is.na(Treatment) & Treatment=='Firm')[,variable,drop=T],
                alternative='two.sided', mu=0, var.equal=F, conf.level=0.95)
  data.frame(p.value_1=round(out1$p.value, 3), t.stat_1=round(out1$statistic, 3), blank_1='',
             p.value_2=round(out2$p.value, 3), t.stat_2=round(out2$statistic, 3), blank_2='',
             p.value_3=round(out3$p.value, 3), t.stat_3=round(out3$statistic, 3), blank_3='',
             stringsAsFactors=F)
}) %>%
  set_rownames(c('Full-time','Central Nominee','Competitively elected','Educational career','Years of education','Level of education'))
out1 = t.test(x=subset(survey, !is.na(Treatment) & Treatment=='Control')$Q1,
              y=subset(survey, !is.na(Treatment) & Treatment=='Citizen')$Q1,
              alternative='two.sided', mu=0, var.equal=F, conf.level=0.95)
out2 = t.test(x=subset(survey, !is.na(Treatment) & Treatment=='Control')$Q1,
              y=subset(survey, !is.na(Treatment) & Treatment=='Firm')$Q1,
              alternative='two.sided', mu=0, var.equal=F, conf.level=0.95)
out3 = t.test(x=subset(survey, !is.na(Treatment) & Treatment=='Citizen')$Q1,
              y=subset(survey, !is.na(Treatment) & Treatment=='Firm')$Q1,
              alternative='two.sided', mu=0, var.equal=F, conf.level=0.95)
table_1_bottom = data.frame(p.value_1=out1$p.value, t.stat_1=out1$statistic, blank_1='',
                            p.value_2=out2$p.value, t.stat_2=out2$statistic, blank_2='',
                            p.value_3=out3$p.value, t.stat_3=out3$statistic, blank_3='',
                            stringsAsFactors=F) %>%
  set_rownames('Prepared for debate') %>%
  rbind(table_1_bottom, .)

table_1_bottom = ldply(c('Spoke','said_own_province'), function(variable) {
  out1 = t.test(x=subset(pooled_texts, !is.na(Treatment) & Treatment=='Control')[,variable,drop=T],
                y=subset(pooled_texts, !is.na(Treatment) & Treatment=='Citizen')[,variable,drop=T],
                alternative='two.sided', mu=0, var.equal=F, conf.level=0.95)
  out2 = t.test(x=subset(pooled_texts, !is.na(Treatment) & Treatment=='Control')[,variable,drop=T],
                y=subset(pooled_texts, !is.na(Treatment) & Treatment=='Firm')[,variable,drop=T],
                alternative='two.sided', mu=0, var.equal=F, conf.level=0.95)
  out3 = t.test(x=subset(pooled_texts, !is.na(Treatment) & Treatment=='Citizen')[,variable,drop=T],
                y=subset(pooled_texts, !is.na(Treatment) & Treatment=='Firm')[,variable,drop=T],
                alternative='two.sided', mu=0, var.equal=F, conf.level=0.95)
  data.frame(p.value_1=out1$p.value, t.stat_1=out1$statistic, blank_1='',
             p.value_2=out2$p.value, t.stat_2=out2$statistic, blank_2='',
             p.value_3=out3$p.value, t.stat_3=out3$statistic, blank_3='',
             stringsAsFactors=F)
}) %>%
  set_rownames(c('Spoke','Mentioned province')) %>%
  rbind(table_1_bottom, .)

rm(list=c('out1','out2','out3'))
stargazer(table_1_top, summary=F, 
          align=T, no.space=T, label='tab1_top')
stargazer(table_1_bottom, summary=F, 
          title='Randomization achieved balance across treatment conditions. Top panel displays summary statistics; lower panel demonstrates balance across treatment arms.', 
          align=T, no.space=T, label='tab1_bottom')
